library(haven)
library(foreign)
library(readxl)
library(openxlsx)
library(car)
library(dplyr)
library(readstata13)

# Import data
d <- read_excel("Data for Affluence and influence in a social democracy.xlsx")

######################################################################
#---------------------------- START ---------------------------------# 
######################################################################

# Calculate total n in each category
d$S_HINC1 <- (d$HINC1_FAV+d$HINC1_OPP)
d$S_HINC2 <- (d$HINC2_FAV+d$HINC2_OPP)
d$S_HINC3 <- (d$HINC3_FAV+d$HINC3_OPP)
d$S_HINC4 <- (d$HINC4_FAV+d$HINC4_OPP)
d$S_HINC5 <- (d$HINC5_FAV+d$HINC5_OPP)
d$S_HINC6 <- (d$HINC6_FAV+d$HINC6_OPP)
d$S_HINC7 <- (d$HINC7_FAV+d$HINC7_OPP)
d$S_HINC8 <- (d$HINC8_FAV+d$HINC8_OPP)
d$S_HINC9 <- (d$HINC9_FAV+d$HINC9_OPP)
d$S_HINC10 <- (d$HINC10_FAV+d$HINC10_OPP)
d$S_HINC11 <- (d$HINC11_FAV+d$HINC11_OPP)
d$S_HINC12 <- (d$HINC12_FAV+d$HINC12_OPP)

# Calculate % support in each category 
d$pcFAV_INC1 <- d$HINC1_FAV/(d$HINC1_FAV+d$HINC1_OPP)
d$pcFAV_INC2 <- d$HINC2_FAV/(d$HINC2_FAV+d$HINC2_OPP)
d$pcFAV_INC3 <- d$HINC3_FAV/(d$HINC3_FAV+d$HINC3_OPP)
d$pcFAV_INC4 <- d$HINC4_FAV/(d$HINC4_FAV+d$HINC4_OPP)
d$pcFAV_INC5 <- d$HINC5_FAV/(d$HINC5_FAV+d$HINC5_OPP)
d$pcFAV_INC6 <- d$HINC6_FAV/(d$HINC6_FAV+d$HINC6_OPP)
d$pcFAV_INC7 <- d$HINC7_FAV/(d$HINC7_FAV+d$HINC7_OPP)
d$pcFAV_INC8 <- d$HINC8_FAV/(d$HINC8_FAV+d$HINC8_OPP)
d$pcFAV_INC9 <- d$HINC9_FAV/(d$HINC9_FAV+d$HINC9_OPP)
d$pcFAV_INC10 <- d$HINC10_FAV/(d$HINC10_FAV+d$HINC10_OPP)
d$pcFAV_INC11 <- d$HINC11_FAV/(d$HINC11_FAV+d$HINC11_OPP)
d$pcFAV_INC12 <- d$HINC12_FAV/(d$HINC12_FAV+d$HINC12_OPP)

# Make prediction variables
d$incpred90 <- NA
d$incpred70 <- NA
d$incpred50 <- NA
d$incpred30 <- NA
d$incpred10 <- NA

# For-loop that for each row finds the percentile midpoint for each category 
# in that survey's income variable, then uses those scores as IV and the group's 
# preference (% support) as DV in a  logistic regression model weigted by the 
# n of each group. Then uses resulting coefficients to impute preferences of 
# desired percentiles (e.g. 90th, 70th, 50th, etc.).   
for(i in 1:nrow(d)){
  drow <- d[i,]
  d1 <- drow %>% dplyr::select(starts_with("S_HINC"))
  d1 <- as.data.frame(t(d1))
  d1$prop <- d1$V1/sum(d1$V1, na.rm=T)
  d1$cumu <- cumsum(d1$prop)
  d1 <- mutate(d1, score = ((cumu - lag(cumu))/2)+lag(cumu)) # Make percentile midpoint scores
  d1$score[1] <- d1$cumu[1]-(d1$cumu[1]/2) # Make percentile midpoint score for first category since formula above returns NA for that one
  d1$income <- d1$Var1
  d1 <- add_rownames(d1, "income")
  names(d1)[names(d1)=="V1"] <- "n"
  d1 <- dplyr::select(d1, income, score, n)
  d2 <- drow %>% dplyr::select(starts_with("pcFAV_INC"))
  d2 <- as.data.frame(t(d2)) 
  d2$income <- 1:nrow(d2) 
  names(d2)[names(d2)=="V1"] <- "sup"
  d3 <- merge(d1,d2, by="income")
  m1 <- glm(sup ~ score + I(score^2), data=d3, weights=n, family = "binomial") # Income and income^2 as IVs, just like Gilens (2012, 61)
  d4 <- data.frame(score=c(0.90, 0.70, 0.50, 0.30, 0.10))
  d4$pred <- predict(m1, d4, type="response")
  d$incpred90[i] <- d4$pred[1]
  d$incpred70[i] <- d4$pred[2]
  d$incpred50[i] <- d4$pred[3]
  d$incpred30[i] <- d4$pred[4]
  d$incpred10[i] <- d4$pred[5]
}

write.xlsx(d, "Data for Affluence and influence in a social democracy.xlsx") # Writes out master dataset with the new variables added

######################################################################
#----------------------------- END ----------------------------------# 
######################################################################




